import lxml.etree as le
# 导入lxml模块的etree

with open('edu.html','r',encoding='utf-8') as f:
    html=f.read()
    # print(html)
    # 把字符串对象html转换成xpath对象
    html_x=le.HTML(html)
    # print(html_x)  # 得到xpath对象<Element html at 0x2070479bfc8>
    div_x_s=html_x.xpath('//div[@class="classify_cList"]')
    data_s=[]
    # print(div_x_s)  #得到xpath对象作为内容的列表[<Element div at 0x2a75c737208>, <Element div at 0x2a75c737448>, <Element div at 0x2a75c737f08>, <Element div at 0x2a75c737f48>, <Element div at 0x2a75c737f88>, <Element div at 0x2a75c740048>]
    for div_X in div_x_s:
        # 找到下面包裹的h3标签中的a标签中的文本即为1级目录,  . 表示当前路径
        # category1=div_X.xpath('./h3/a/text()')
        # print(category1)
        category1 = div_X.xpath('./h3/a/text()')[0]
        # print(category1)
        category2_s=div_X.xpath('./div/span/a/text()')
        # print(category2_s)
        data_s.append(
            dict(
                category1=category1,
                category2_s=category2_s
            )
        )
    # print(data_s)
    for data in data_s:
        print(data.get('category1'))
        # print(data.get('category2_s'))
        for category2 in data.get('category2_s'):
            print('    ',category2)

